#
#
# Figs234.R. Create Figures 2, 3, and 4.
#
# Seth Hill, sjhill@ucsd.edu
#


#rm(list=ls())
if (.Platform$OS == "windows") { # Set working directory in location of script.
  .doit <- function() {
  frame_files <- lapply(sys.frames(), function(x) x$ofile);frame_files <- Filter(Negate(is.null), frame_files)
  PATH <- dirname(frame_files[[length(frame_files)]]); setwd(PATH) ; rm(PATH,frame_files)
  }
  try(.doit(),silent=T)
}

library(bit64)
library(data.table)
library(RColorBrewer)
palette(brewer.pal(n=9,name="Set1")[c(1:5,7:9)]) # reset default colors
options(digits=3)

# Debug when not -1.
nrow = -1 #5e06

# Data created in CleanAndSubsetData.R.
# Individual level.
DT = fread("all_reviews_cleaned.csv",nrows=nrow)
# Aggregated to the date.
DT3 = fread("reviews_by_date.csv")

#################################
# Plots
#################################

#
# Figure 2. Scatter of bimonthly average dissatisfaction on average bureaucracy mentions.
#

DT[,dissat := 5 - overall]
# Aggregate to two-month bin.
DT[, year  := as.numeric(format(date2,"%Y"))]
DT[, month := as.numeric(format(date2,"%m"))]
DT[, bimonth := ceiling(month/2)]
MT = DT[,c(lapply(.SD,mean,na.rm=T),.N),keyby=c("year","bimonth"),.SDcols=c("dissat","bureau_title")]
# Point size proportional to n reviews.
ptSize <- function(x,from=.8,to=3) {
  x_min <- min(x, na.rm = TRUE)
  x_max <- max(x, na.rm = TRUE)
  from + (x - x_min) / (x_max - x_min) * (to - from)
}
MT[N > 500,pt_cex := ptSize(sqrt(N))]

# Plot.
f = "Figure_2.pdf"
pdf(f,width=10,height=6)
par(mar=c(4.1,8.1,1.1,4.1),cex.axis=1.2)
MT[N > 500,plot(x=bureau_title,y=dissat,pch=1,cex=pt_cex,ann=F,axes=F,log="x")]
grid()
MT[N > 500,points(x=bureau_title,y=dissat,pch=1,cex=pt_cex)]
s = MT[N > 500,loess.smooth(x=bureau_title,y=dissat,span=3/4)]; lines(x=s$x,y=s$y,lwd=5,col=4)

# X axis.
x_at = c(.0005,.001,.0025,.005)
axis(1,at=x_at,labels=sprintf("%1.4f",x_at)); axis(2,las=2)
title(xlab="Bimonthly Average Rate Bureaucracy Mentioned (log scale)", cex.lab=1.2)
mtext("Bimonthly\nAverage\nDissatis-\nfaction", side=2, line=3, las=1, cex=1.2)

dev.off()


#
# Figure 3. Heatmaps.
#
# Quantiles of firm averages for companies with > 500 reviews.
DT2 = DT[firm_n_reviews > 500,]
DT2[, firm_bureau_category := cut(
  frank(firm_bureau_title, ties.method = "average") / .N, 
  breaks = c(0, 0.5, 0.75, 0.9, 1),
  labels = FALSE, #c("Bottom Half", "Third Quartile", "75-95th Percentile", "Top Tenth"),
  include.lowest = TRUE, ordered_result=TRUE
)]
# Reverse coding to make more bureaucracy lower.
DT2[,firm_bureau_category := 5 - firm_bureau_category]
# Check
print(DT2[,.(minf=min(firm_bureau_title),maxf=max(firm_bureau_title)),keyby="firm_bureau_category"])

myHeatmap <- function(DT, yvar="overall", xvar="comp_benefits", xlab="Compensation & Benefits Rating",cell_text="table", pdf_name="Figure3.pdf") { 
  # Open pdf.
  pdf(pdf_name,width=10,height=6)
  par(mar=c(3.1,7.1,.6,1.1),cex.axis=1.2)

  # Compute frequency table
  heatmap_data <- table(DT[[yvar]],DT[[xvar]])
  heatmap_matrix = as.matrix(heatmap_data)
  # Compute percentages
  cell_percent = 100 * heatmap_matrix / sum(heatmap_matrix)
  print(col_percent <- 100*sweep(cell_percent,2,colSums(cell_percent),"/"))
  # Palette.
  the_palette = brewer.pal(n=9,name="YlGnBu") #gray(seq(1, 0.2, length.out = 100))  # 1 = white, 0 = black

  image(y = as.numeric(rownames(heatmap_matrix)), x = as.numeric(colnames(heatmap_matrix)), z = t(heatmap_matrix), col = the_palette, ann=F, axes=F)
  
  # Axis labels.
  axis(2, at = 1:5, labels = 1:5, tick=F, las = 2)
  if (xlab == "Compensation & Benefits Rating") {
    axis(1, at = 1:5, labels = 1:5, tick=F)
  } else if (xlab == "Rate of Bureaucracy Mentions in Company") {
    axis(1, at = 1:4, labels = rev(c("Bottom Half", "Third Quartile", "75-95th\nPercentile", "Top Tenth")), tick=F)
  } 
  
  title(xlab = xlab, line=2, cex.lab=1.2)
  mtext("Overall\nFive-Star\nRating", side=2, line=2, las=1, cex=1.2)

  if (!is.null(cell_text)) {
    # Add cell percentage text
    for (i in seq_len(nrow(heatmap_matrix))) {
      for (j in seq_len(ncol(heatmap_matrix))) {
        if (cell_text == "column") {
          # Cell percent summed to the column.
          percent_value <- round(col_percent[i, j], 0)
        } else {
          # Cell percent summed to the table.
          percent_value <- round(cell_percent[i, j], 0)
        }
        # Correct the x and y indexing
        cell_x <- as.numeric(colnames(heatmap_matrix))[j]
        cell_y <- as.numeric(rownames(heatmap_matrix))[i]
        # Choose white text for dark cells, black for light cells
        text_color <- ifelse(heatmap_matrix[i, j] > quantile(heatmap_matrix, .9), "white", "black")            
        text(cell_x, cell_y, labels = paste0(percent_value, "%"), col = text_color, cex = 1.2)
      }
    }
  }
  # Close pdf.
  dev.off()

  return(heatmap_data)
}


# No bureaucracy in title.
myHeatmap(DT[bureau_title == 0,],pdf_name="Figure3a.pdf")
# Bureaucracy in title.
myHeatmap(DT[bureau_title == 1,],pdf_name="Figure3b.pdf")
# Conflict in title.
myHeatmap(DT[conflict_title == 1,],pdf_name="Figure3c.pdf")
# Stress in title.
myHeatmap(DT[stress_title == 1,],pdf_name="Figure3d.pdf")
# Low pay in title.
myHeatmap(DT[lowpay_title == 1,],pdf_name="Figure3e.pdf")
# Long hours in title.
myHeatmap(DT[longhours_title == 1,],pdf_name="Figure3f.pdf")


#
# Figure 4. Stacked bar plots.
#
f = "Figure_4.pdf"
pdf(f,width=8,height=8)
par(mar=c(3.1,2.3,1.6,1.1), cex.axis=1.2, mfrow = c(2, 3))  # One plot per comp_benefits level

for (b in 1:5) {
  tab = DT[comp_benefits == b, prop.table(table(bureau_title, overall), margin = 1)]
  barplot(100 * tab, beside = TRUE, col = c("steelblue", "orange"), main = paste("Compensation & Benefits", b, ifelse(b==1,"Star","Stars")), xlab = "", ylab = "", axes=F, legend = F, width=2)
  axis(2,las=2)
  # Add xlab to bottom plots.
  if (b %in% c(3,4,5)) { title(xlab="Overall Star Rating", line=2, cex.lab=1.2) }
}
# Empty plot for the legend
plot.new()
legend("center", legend = c("Bureaucracy Not Mentioned", "Bureaucracy Mentioned"), fill = c("steelblue", "orange"), bty = "n", cex = 1.4)

dev.off()


if (nrow!=-1) warning("\n\n########\nALERT: Not running on full data set!\n########\n")